/**** This dofile is to randomize schools for STEM program

*/

clear all
capture log close
set more off
set varabbrev off

log using "$logfile/randomization_school_$date.log", replace


* generate value of random seeds 
drop _all
set obs 1000
set seed 142857 
g value=int(10^6*runiform())
levelsof value
gl randomseed "`r(levels)'"


*******************************************************************************
*						GRADE 9 and 10 STUDENTS
*******************************************************************************
use "$clean/grade910_baseline_reachable_wgrade.dta", clear

* temporary delete students without schoolname information
drop if schoolname==""

* check respones if the school does not have 9 and 10 students
tab schoolname if level910==0
drop if level910==0

* total students taking the survey
bys schoolname: egen size=total(responseid~=.)
bys schoolname: egen size10=total(responseid~=. & grade==10)
g surveyrate = size/total910 
replace surveyrate=1 if surveyrate>1

* drop school with less than 50% survey rate
drop if surveyrate<0.5

* wealth index
foreach i in x9a x9b x9c x9d x9e x9f x9g x9h x9i c2a c2d c2e c2f {
	g i_`i'=`i'
	recode i_`i' (99=.)
	recode i_`i' (2=0)
}
pca i_x9a-i_x9i
predict wealth_index, score



g age = 2021-b_year

* compute school-level mean variable
collapse (mean) size size10 total910 surveyrate private stem_cst stem_jnec stem_gcit stem_shrubtse stem_cnr stem_rtc dn_stem sex age i_c2a i_c2d i_c2e i_c2f c3c1-c3c7 wealth_index, by(schoolname)

g group=1 if stem_cst==1
replace group=2 if stem_jnec==1
replace group=3 if stem_gcit==1
replace group=4 if stem_shrubtse==1
replace group=5 if stem_cnr==1
replace group=6 if stem_rtc==1

* check if it is feasible to randomize with geographic + private stratification
egen geopri = group(group private)
tab geopri

levelsof geopri 
global geoprilist "`r(levels)'"
g groupsize=.
foreach i of global geoprilist {
	summ total910 if geopri==`i', de
	local med`i' = r(p50)
	dis "`med`i''"
	replace groupsize = 1 if total910<=`med`i'' & geopri==`i'
	replace groupsize = 2 if total910>`med`i''  & geopri==`i'
}

egen geoprisize = group(group private groupsize)
tab geoprisize
replace geoprisize = 13 if geoprisize==14

* randomization, stratified by geographic + private group + size
cap drop treat*
foreach j of global randomseed {
set seed `j'
bys geoprisize: g rand_num`j'=runiform()
bys geoprisize: egen ordering`j'=rank(rand_num`j')
bys geoprisize: g treat_`j' = ordering`j'>((_N*3-4)/4)
drop rand_num`j' ordering`j'

* Compare means an compute normalized difference
cap g n=_n	
foreach variable in size total910 surveyrate private dn_stem sex age i_c2a i_c2d i_c2e i_c2f c3c1 c3c2 c3c3 c3c4 c3c5 c3c6 c3c7 wealth_index {
qui ttest `variable', by(treat_`j')
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')
mat R`j'`variable' = t`j', nor_diff`j'
}
mat R`j'=R`j'size/R`j'total910/R`j'surveyrate/R`j'private/R`j'dn_stem/R`j'sex/R`j'age/R`j'i_c2a/R`j'i_c2d/R`j'i_c2e/R`j'i_c2f/R`j'c3c1/R`j'c3c2/R`j'c3c3/R`j'c3c4/R`j'c3c5/R`j'c3c6/R`j'c3c7/R`j'wealth_index

svmat double R`j'
rename R`j'1 t_`j'
rename R`j'2 NorDiff_`j'

* remove randomization results with 
	sort schoolname
	* schools assigned as treatment but do not have grade information
	if treat_`j'[39]==1 | treat_`j'[40]==1 | treat_`j'[60]==1 | treat_`j'[70]==1 | treat_`j'[71]==1 {
	drop *_`j' 
	}
	
	* statistically significant difference in means
	cap g tabs_`j'=abs(t_`j')
	cap qui summ tabs_`j'
	if r(max)>1.64 {
		cap drop *_`j'
	}
	cap drop tabs_`j'
}

* keep the result with minimized maximum normalized difference 
preserve 
keep NorDiff* 
foreach v of varlist NorDiff* {
	g abs`v'=abs(`v')
	egen max_`v' = max(abs`v')
	drop abs`v'
}
egen minmax = rowmin(max*)

foreach v of varlist NorDiff* {
if max_`v' ~= minmax {
	cap drop `v' max_`v'
}
}
cap drop minmax
foreach var of varlist NorDiff* {
local seed = substr("`var'", 9, .)
}

local j = `seed'
restore 

drop treat_* t_* NorDiff_* 

* Export the result with choden seed
set seed `j'
bys geoprisize: g rand_num`j'=runiform()
bys geoprisize: egen ordering`j'=rank(rand_num`j')
bys geoprisize: g treat_`j' = ordering`j'>((_N*3-4)/4)
drop rand_num`j' ordering`j'
	
foreach variable in size total910 surveyrate private dn_stem sex age i_c2a i_c2d i_c2e i_c2f c3c1 c3c2 c3c3 c3c4 c3c5 c3c6 c3c7 wealth_index {
qui ttest `variable', by(treat_`j')
mat Cn`j' = r(N_1)
mat Tn`j' = r(N_2)
mat C`j' = r(mu_1)
mat T`j' = r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat diff`j' = r(mu_1) - r(mu_2)
mat t`j' = r(t)
scalar nor`j'=(r(mu_1) - r(mu_2))/sqrt(0.5*(r(sd_1)^2 + r(sd_2)^2))
mat nor_diff`j' = (`=scalar(nor`j')')

mat R`j'`variable' = Cn`j',C`j',Tn`j', T`j',diff`j',t`j', nor_diff`j'

}
mat R`j'=R`j'size/R`j'total910/R`j'surveyrate/R`j'private/R`j'dn_stem/R`j'sex/R`j'age/R`j'i_c2a/R`j'i_c2d/R`j'i_c2e/R`j'i_c2f/R`j'c3c1/R`j'c3c2/R`j'c3c3/R`j'c3c4/R`j'c3c5/R`j'c3c6/R`j'c3c7/R`j'wealth_index

qui svmat double R`j'
rename R`j'1 CN_`j'
rename R`j'2 Cmean_`j'
rename R`j'3 TN_`j'
rename R`j'4 Tmean_`j'
rename R`j'5 Diff_`j'
rename R`j'6 t_`j'
rename R`j'7 NorDiff_`j'

cap format C* T* Di* t_* NorDiff* %10.3fc

preserve 
sort n
cap drop size-geoprisize
cap drop schoolname
cap drop variable
g variable=""
replace variable="number of students taking baseline survey" if n==1
replace variable="total number of students" if n==2
replace variable="baseline survey completion rate" if n==3
replace variable="school is private" if n==4
replace variable="distance to the nearest STEM college (km)" if n==5
replace variable="share of male students" if n==6
replace variable="age of students"  if n==7
replace variable="students' liking for Math" if n==8
replace variable="students' liking for Physics" if n==9
replace variable="students' liking for Chemistry"  if n==10
replace variable="students' liking for Biology" if n==11
replace variable="share of students consulting parents about educareer" if n==12
replace variable="share of students consulting siblings about educareer" if n==13
replace variable="share of students consulting relatives about educareer" if n==14
replace variable="share of students consulting friends about educareer" if n==15
replace variable="share of students consulting neighbors about educareer" if n==16
replace variable="share of students consulting teachers about educareer" if n==17
replace variable="share of students consulting others about educareer" if n==18
replace variable="wealth index" if n==19
keep if n<=20
replace n=0 if n==20
order variable
sort n
replace variable="randomization seed" if n==0
cap drop n 
cap drop treat*

export delimited using "$randomization/randomization_school_STEM_balance.csv", nolabel datafmt replace
restore 

cap keep schoolname treat* size size10 total910 geoprisize
cap sort geopri treat* schoolname 
export excel using "$randomization/randomization_school_STEM_list.xlsx", replace firstrow(variables)



